This code is from the shared github repository by Ostendorf et al. but rearranged to perform and reproduce the same single cell analysis for figures 3e-g specifically: Git: https://github.com/benostendorf/ostendorf_etal_2022 Paper: https://www.nature.com/articles/s41586-022-05344-2
Raw data files are included in the package here and originally obtained from: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE199498
## Import Data and Perform Seurat Workflow
# log-normalize, scale, PCA, UMAP (Louvain)
if (!file.exists("data/Seurat.RDS")) {
# Import data
mat <- readMM("data/DGE.mtx")
cell_meta <- read.delim("data/cell_metadata.csv",
stringsAsFactor = FALSE, sep = ",")
genes <- read.delim("data/all_genes.csv",
stringsAsFactor = FALSE, sep = ",")
# Set row/col parameters and label the GE matrix
cell_meta$bc_wells <- make.unique(cell_meta$bc_wells, sep = "_dup")
rownames(cell_meta) <- cell_meta$bc_index
genes$gene_name <- make.unique(genes$gene_name, sep = "_dup")
colnames(mat) <- genes$gene_name
rownames(mat) <- rownames(cell_meta)
mat_t <- t(mat)
# Remove empty rownames, if they exist
mat_t <- mat_t[(rownames(mat_t) != ""), ]
df_raw <- CreateSeuratObject(mat_t, min_cells = 2, meta.data = cell_meta)
# Setting initial cell class to a single type (instead of plate well numbers), this will change after clustering.
df_raw@meta.data$orig.ident <- factor(rep("df", nrow(df_raw@meta.data)))
Idents(df_raw) <- df_raw@meta.data$orig.ident
# Add % mitochondrial reads, condition and genotype metadata
df_raw[["condition"]] <- gsub("(w*)_\\d*_E\\d", "\\1", df_raw@meta.data$sample)
df_raw[["genotype"]] <- gsub(".*_(E\\d)", "\\1", df_raw@meta.data$sample)
df_raw[["percent.mt"]] <- PercentageFeatureSet(df_raw, pattern = "^mt-")
# Perform subsetting & seurat
## Define thresholds
feature_min <- 150
feature_max <- 7500
count_max <- 40000
max_perc <- 15
# Subset
df <- subset(df_raw,
subset =
nFeature_RNA > feature_min & nFeature_RNA < feature_max &
nCount_RNA < count_max &
percent.mt < max_perc)
# Seurat workflow
df <- NormalizeData(df)
df <- FindVariableFeatures(df)
df <- ScaleData(df)
df <- RunPCA(df)
df <- RunUMAP(df, dims = 1:30)
ElbowPlot(df, ndims = 30)
df <- FindNeighbors(df, dims = 1:30)
df <- FindClusters(df, resolution = 1.4)
df <- BuildClusterTree(df, reorder = TRUE, reorder.numeric = TRUE)
saveRDS(df, "data/Seurat.RDS")
}
df <- readRDS("data/Seurat.RDS")
# Reorder & rename the clusters from running UMAP
rename_clusters <- c(`1` = "Airway epithelial A", `2` = "Airway epithelial B",
`3` = "Granulocytes", `4` = "Myofibroblasts",
`5` = "Lipofibroblasts", `6` = "AT1",
`7` = "misc_01", `8` = "Ciliated cells",
`9` = "Mesothelial cells", `10` = "Pericytes",
`11` = "misc_02", `12` = "Col14a1pos fibroblasts",
`13` = "misc_06" , `14` = "Vcam1pos ECs A",
`15` = "NK cells", `16` = "B cells",
`17` = "T cells A", `18` = "T cells B",
`19` = "T cells C", `20` = "Neuronal",
`21` = "Capillary ECs", `22` = "Vascular ECs A",
`23` = "Other ECs", `24` = "Vcam1pos ECs B",
`25` = "Vascular ECs B", `26` = "misc_03",
`27` = "AT2", `28` = "Alveolar mø A",
`29` = "Alveolar mø B", `30` = "Alveolar mø prolif",
`31` = "DCs", `32` = "misc_04", `33` = "Monocytes A",
`34` = "Monocytes B", `35` = "Interstitial mø")
df <- RenameIdents(object = df, rename_clusters)
df$celltype <- Idents(object = df)
# Re-cluster for T-cells
T_cells <- subset(df, idents = c("T cells A", "T cells B", "T cells C"))
T_cells <- FindVariableFeatures(T_cells)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -3.204
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.30103
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6491e-14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.22764
T_cells <- ScaleData(T_cells)
## Centering and scaling data matrix
T_cells <- RunPCA(T_cells)
## PC_ 1
## Positive: Il23r, Ccr7, Il2ra, Cd163l1, Myo3b, Ctla4, Ikzf2, Pdcd1, Il17rb, Ifngas1
## Neb, Il21r, Got1, Gm44174, Cd40lg, Nr4a3, Elovl6, 5830411N06Rik, Irf4, Rgcc
## Dnah8, Tnfsf11, Ar, Trdc, Ptpn13, Tagap, Car12, Blk, 1600014C10Rik, Kcnc1
## Negative: Ptprg, Galnt18, Adgrf5, Prickle2, Calcrl, Epas1, Bmpr2, Adgrl3, Ptprb, Ptprm
## Fendrr, Ldb2, Flt1, Slco2a1, Shank3, Nckap5, Cd93, Nfib, Clic4, Pard3b
## Cdk14, Cdh5, Tcf4, Tspan7, Pde3a, Rapgef5, Ace, Cyyr1, Cd36, Adgrl2
## PC_ 2
## Positive: Prickle2, Rgs6, Ednrb, 4930554G24Rik, Rgs12, Tmem100, St5, Pde8b, Atp8a1, Tbx3os1
## Gnao1, Ncald, Scn7a, Aff3, Tmcc2, Fendrr, Car4, Cyp4b1, Sema3c, Grb14
## Galnt18, Smad6, Egflam, Psd3, Clec1a, Calcrl, Nckap5, Clic5, Afap1l1, Meis1
## Negative: Mki67, Top2a, Cenpf, Knl1, Kif15, Prc1, Aspm, Kif11, Diaph3, Cenpe
## Kif4, Neil3, Iqgap3, Anln, Ncapg2, Cit, Smc2, Kif14, Spag5, Incenp
## Ect2, Cenpp, Tpx2, Rad51b, Kntc1, Nusap1, Ccna2, Pclaf, Tubb5, Efcab11
## PC_ 3
## Positive: Ptprb, Acer2, Cadm1, Hmcn1, Plcb1, Mctp1, Tek, Prex2, Ldb2, Vegfa
## Ntrk2, Adamts9, Vwf, Samd12, Cd93, Sema3c, Nckap5, Adgrl3, Ncald, Bmp6
## Egr1, Itga1, Atf3, Tmem2, Pde3a, Kit, Adamts1, Myrip, Mcf2l, Ntn1
## Negative: Rgs6, Ednrb, Pde8b, Igfbp7, Lmntd1, Emp2, Tmeff2, Egflam, Carmil1, Kdr
## 2610203C22Rik, Grb14, Tbx3os1, Car4, Sema5a, D630045J12Rik, Slc22a23, Itga3, Kalrn, Chst1
## Ccdc68, Kitl, Dpysl5, Gnao1, Ampd3, Tln2, Tbx3, Cyp4b1, Dapk1, Mertk
## PC_ 4
## Positive: Il1b, Nr4a3, St6galnac3, Mpp7, Il1r2, Ncf2, Nfkb1, Fosb, Ern1, Il2ra
## Furin, Pde7b, Hdc, Itgav, Antxr2, Il23r, Il1rl1, Tpd52, Lyn, Igf1r
## Fos, Gm49339, Alcam, Syk, Thbs1, Smox, Trem1, Litaf, Cers6, Csf2rb
## Negative: Hpgd, Nckap5, Hmcn1, Cenpf, Tmem100, Sema3c, Ccdc85a, Myzap, Samd12, Tspan18
## Fmo1, Calcrl, Mki67, Top2a, Fendrr, Scn7a, Clec1a, Aspm, Mcc, Kif4
## Cavin2, Cenpe, Cadps2, Prickle2, 9530026P05Rik, Pde3a, Tspan7, Neil3, Ncald, Knl1
## PC_ 5
## Positive: Selp, Emp1, Adamts4, Prkcg, Il1b, Gda, Actn1, Serpine1, Tinagl1, Vcam1
## Nr2f2, Akap12, Lyve1, Adamts9, Cmss1, Adamts1, Tgm2, Col15a1, Cgnl1, Tm4sf1
## S100a9, Timp3, Osmr, Zfp423, Prss23, Ncf2, Ptgs2, Col4a1, Tmem252, Litaf
## Negative: St6galnac3, Il2ra, Il23r, Large1, Cd163l1, Pard3, Adam12, Blk, Igf1r, Il1rl1
## Gata3, Clnk, Il1r1, Mpp7, Myo1e, Nfkb1, Il17rb, Ret, Ern1, Fstl4
## Trdc, Ablim3, Nav2, Itgav, Furin, Abi3bp, Ptpn13, Pdcd1, Ikzf2, Dach2
ElbowPlot(T_cells)
T_cells <- RunUMAP(T_cells, dims = 1:20)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:47:47 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:47:47 Read 3199 rows and found 20 numeric columns
## 11:47:47 Using Annoy for neighbor search, n_neighbors = 30
## 11:47:47 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:47:48 Writing NN index file to temp file /var/folders/rl/gfmdpywx275c9cc9s2w1j3sm0000gn/T//Rtmp1rrbK2/file3f077ad0ce10
## 11:47:48 Searching Annoy index using 1 thread, search_k = 3000
## 11:47:50 Annoy recall = 100%
## 11:47:52 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 11:47:54 Initializing from normalized Laplacian + noise (using irlba)
## 11:47:54 Commencing optimization for 500 epochs, with 146382 positive edges
## 11:48:01 Optimization finished
T_cells <- FindNeighbors(T_cells, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
T_cells <- FindClusters(T_cells, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3199
## Number of edges: 125333
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8277
## Number of communities: 10
## Elapsed time: 0 seconds
# Find DEG for T-cells
markers_T_cells <- FindAllMarkers(T_cells, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
markers_by_T_cells <-
markers_T_cells |>
group_by(cluster) |>
slice_max(n = 5, order_by = avg_log2FC)
T_cells.averages <- AverageExpression(T_cells, return.seurat = TRUE)
## Centering and scaling data matrix
# Rename the clusters for T-cells
rename_clusters_T_cells <- c(`0` = "misc_05", `1` = "T cells",
`2` = "T cells naive", `3` = "Tregs",
`4` = "misc_05", `5` = "misc_05",
`6` = "misc_05", `7` = "T cells proliferating",
`8` = "misc_05", `9` = "misc_05")
T_cells <- RenameIdents(object = T_cells, rename_clusters_T_cells)
Idents(T_cells) <- factor(Idents(T_cells),
levels = levels(Idents(T_cells)), ordered = TRUE)
T_cells$celltype <- Idents(object = T_cells)
# Rename clusters in main DEG object
df$sub_cluster <- as.character(Idents(df))
df$sub_cluster[Cells(T_cells)] <- as.character(Idents(T_cells))
Idents(df) <- df$sub_cluster
df$celltype <- Idents(df)
# Re-order clusters
levels_clusters <- c("Alveolar mø A", "Alveolar mø B",
"Alveolar mø prolif", "Interstitial mø",
"Monocytes A", "Monocytes B",
"Granulocytes", "DCs",
"NK cells",
"T cells naive", "T cells",
"Tregs", "T cells proliferating",
"B cells",
"Myofibroblasts", "Lipofibroblasts" ,
"Col14a1pos fibroblasts",
"Capillary ECs", "Vascular ECs A",
"Vascular ECs B", "Other ECs",
"Vcam1pos ECs A", "Vcam1pos ECs B",
"Pericytes",
"AT1", "AT2",
"Ciliated cells",
"Airway epithelial A", "Airway epithelial B",
"Mesothelial cells",
"Neuronal",
"misc_01", "misc_02", "misc_03",
"misc_04", "misc_05", "misc_06")
Idents(df) <- factor(Idents(df),
levels = levels_clusters,
ordered = TRUE)
saveRDS(df, "data/Seurat_processed.RDS")
# Find differentially expressed genes, this is a slow step (~20-30min)
cluster.averages <- AverageExpression(object = df, return.seurat = TRUE)
## Centering and scaling data matrix
markers <- FindAllMarkers(df, only.pos = TRUE, min.pct = 0.25)
## Calculating cluster Alveolar mø A
## Calculating cluster Alveolar mø B
## Calculating cluster Alveolar mø prolif
## Calculating cluster Interstitial mø
## Calculating cluster Monocytes A
## Calculating cluster Monocytes B
## Calculating cluster Granulocytes
## Calculating cluster DCs
## Calculating cluster NK cells
## Calculating cluster T cells naive
## Calculating cluster T cells
## Calculating cluster Tregs
## Calculating cluster T cells proliferating
## Calculating cluster B cells
## Calculating cluster Myofibroblasts
## Calculating cluster Lipofibroblasts
## Calculating cluster Col14a1pos fibroblasts
## Calculating cluster Capillary ECs
## Calculating cluster Vascular ECs A
## Calculating cluster Vascular ECs B
## Calculating cluster Other ECs
## Calculating cluster Vcam1pos ECs A
## Calculating cluster Vcam1pos ECs B
## Calculating cluster Pericytes
## Calculating cluster AT1
## Calculating cluster AT2
## Calculating cluster Ciliated cells
## Calculating cluster Airway epithelial A
## Calculating cluster Airway epithelial B
## Calculating cluster Mesothelial cells
## Calculating cluster Neuronal
## Calculating cluster misc_01
## Calculating cluster misc_02
## Calculating cluster misc_03
## Calculating cluster misc_04
## Calculating cluster misc_05
## Calculating cluster misc_06
saveRDS(markers, "data/diff_markers.RDS")
df <- readRDS("data/Seurat_processed.RDS")
# Filter
# Define clusters to filter
clusters_to_filter <- grep("misc", levels(Idents(df)), value = TRUE)
# Remove ambiguous clusters
df_filt <- subset(df, idents = clusters_to_filter,
invert = TRUE)
df_filt$celltype <- droplevels(df_filt$celltype)
df_filt.averages <- AverageExpression(df_filt, return.seurat = TRUE)
## Centering and scaling data matrix
celltypes <- as.character(Idents(df_filt))
# Group cell types
df_filt$celltype_grouped <- case_when(grepl("Monocyte", celltypes) ~ "Monocytes",
grepl("Alveolar", celltypes) ~ "Alveolar Mø",
grepl("T cells", celltypes) ~ "T cells",
grepl("Fibro", celltypes, ignore.case = T) ~ "Fibroblasts",
grepl("ECs", celltypes) ~ "Endothelial",
grepl("AT1|AT2|epith|Meso|Cilia", celltypes) ~ "Epithelium",
TRUE ~ celltypes)
df_filt$celltype_grouped <-
factor(df_filt$celltype_grouped,
levels = c("Alveolar Mø", "Interstitial mø", "Monocytes", "Granulocytes",
"DCs", "NK cells", "T cells", "Tregs",
"B cells", "Endothelial", "Pericytes", "Fibroblasts",
"Epithelium", "Neuronal"),
ordered = TRUE)
saveRDS(df_filt, "data/df_filt.RDS")
df_filt.averages <- AverageExpression(df_filt, return.seurat = TRUE)
## Centering and scaling data matrix
# Subset data
df_inf <- subset(df_filt, condition == "inf")
df_ctrl <- subset(df_filt, condition == "ctrl")
# df_immune
immune_subsets <- names(table(df_filt$celltype_grouped))[1:9]
df_immune <- subset(df_filt, celltype_grouped %in% immune_subsets)
df_immune$celltype_grouped <- droplevels(df_immune$celltype_grouped)
df_immune$celltype <- droplevels(df_immune$celltype)
# df_immune_inf
df_immune_inf <- subset(df_filt, celltype_grouped %in% immune_subsets & condition == "inf")
df_immune_inf$celltype_grouped <- droplevels(df_immune_inf$celltype_grouped)
df_immune_inf$celltype <- droplevels(df_immune_inf$celltype)
# df_nonimmune
nonimmune_subsets <- names(table(df_filt$celltype_grouped))[10:14]
df_nonimmune <- subset(df_filt, celltype_grouped %in% nonimmune_subsets)
df_nonimmune$celltype_grouped <- droplevels(df_nonimmune$celltype_grouped)
df_nonimmune$celltype <- droplevels(df_nonimmune$celltype)
# df_nonimmune_inf
df_nonimmune_inf <- subset(df_filt, celltype_grouped %in% nonimmune_subsets & condition == "inf")
df_nonimmune_inf$celltype_grouped <- droplevels(df_nonimmune_inf$celltype_grouped)
df_nonimmune_inf$celltype <- droplevels(df_nonimmune_inf$celltype)
# Extract UMAP data for df_filt, df_inf, and df_ctrl
extract_UMAP <- function(seurat_object){
UMAP <- as.data.frame(Embeddings(seurat_object, reduction = "umap"))
UMAP$genotype <- seurat_object$genotype
UMAP$celltype <- Idents(seurat_object)
UMAP$celltype_grouped <- seurat_object$celltype_grouped
UMAP$condition <- seurat_object$condition
return(UMAP)
}
UMAP_df_filt <- extract_UMAP(df_filt)
UMAP_inf <- extract_UMAP(df_inf)
UMAP_ctrl <- extract_UMAP(df_ctrl)
# Plot the clusters with cell type shown
library(ggplot2)
#pdf(file = 'UMAP_all.pdf')
ggplot(data = UMAP_df_filt, aes(x = UMAP_1, y = UMAP_2, col = celltype_grouped)) +
geom_point() +
xlab("UMAP1") +
ylab("UMAP2")
#dev.off()
# Fig 3e
#pdf(file = 'UMAP_condition.pdf')
plot_density(UMAP_df_filt, "condition")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
#dev.off()
# Fig 3f
#pdf(file = 'UMAP_genotype.pdf')
plot_density(UMAP_inf, "genotype")
#dev.off()
# Prepare Fig 3g - Gene Set Enrichment Analysis
# Calculate DGEA for infected cells across genotypes
# Include all genes by setting parameters to `-Inf` for gene ranking
Idents(df_inf) <- df_inf$celltype_grouped
for (genotype in c("E2", "E4")) {
if (!(file.exists(paste0("data/DGEA_geneLists_", genotype, ".rds")))) {
if (!(file.exists(paste0("data/DGEA_", genotype, ".rds")))) {
DGEA_all_clusters <- vector("list", length = nlevels(df_inf))
names(DGEA_all_clusters) <- levels(df_inf)
for (cluster in levels(df_inf)) {
print(cluster)
DGEA_all_clusters[[cluster]] <-
FindMarkers(df_inf,
group.by = "genotype",
subset.ident = cluster,
ident.1 = genotype,
ident.2 = "E3",
min.pct = -Inf,
logfc.threshold = -Inf,
min.diff.pct = -Inf
)
saveRDS(DGEA_all_clusters, paste0("data/DGEA_", genotype, ".rds"))
}
}
DGEA_all_clusters <- readRDS(paste0("data/DGEA_", genotype, ".rds"))
length(DGEA_all_clusters)
# Data wrangling
DGEA_all_clusters_ranked <- vector("list", length = length(DGEA_all_clusters))
names(DGEA_all_clusters_ranked) <- names(DGEA_all_clusters)
for (cluster in seq_along(names(DGEA_all_clusters))) {
DGEA_cluster_subset <- DGEA_all_clusters[[cluster]]
# Calculate ranking metric for GSEA
DGEA_cluster_subset$ranking_metric <-
-log10(DGEA_cluster_subset$p_val) / sign(DGEA_cluster_subset$avg_log2FC)
DGEA_cluster_subset <-
DGEA_cluster_subset[order(DGEA_cluster_subset$ranking_metric, decreasing = TRUE), ]
# Add Entrez ID
DGEA_cluster_subset$entrez <- AnnotationDbi::mapIds(org.Mm.eg.db,
keys = rownames(DGEA_cluster_subset),
column = "ENTREZID",
keytype = "ALIAS",
multiVals = "first")
geneList_cluster_subset <- DGEA_cluster_subset$ranking_metric
names(geneList_cluster_subset) <- DGEA_cluster_subset$entrez
geneList_cluster_subset <- geneList_cluster_subset[!(is.na(geneList_cluster_subset))]
geneList_cluster_subset <- geneList_cluster_subset[!duplicated(names(geneList_cluster_subset))]
DGEA_all_clusters_ranked[[cluster]] <- geneList_cluster_subset
}
saveRDS(DGEA_all_clusters_ranked, paste0("data/DGEA_geneLists_", genotype, ".rds"))
}
}
# Plot Fig 3g
clusters <- levels(df_inf)
clusters_immune <- clusters[c(1:14)]
clusters_fibroblasts <- clusters[c(15:17)]
clusters_endothelial <- clusters[c(18:23)]
clusters_epithelial <- clusters[c(25:30)]
clusters_other <- clusters[c(24, 31)]
# GSEA using hallmark pathways
H_pathways_mm <-
msigdbr(species = "Mus musculus", category = "H") %>%
dplyr::select(gs_name, entrez_gene) %>%
mutate(gs_name = gsub("HALLMARK_", "", gs_name)) %>%
mutate(gs_name = gsub("_", " ", gs_name)) %>%
mutate(gs_name = str_to_sentence(gs_name))
immune_hallmark <- levels(as.factor(H_pathways_mm$gs_name))[c(2, 4, 7, 10:11, 22:27, 42, 44, 45, 46, 49)]
H_pathways_mm <- filter(H_pathways_mm, gs_name %in% immune_hallmark)
for (genotype in c("E2", "E4")) {
# Run GSEA
if (!(file.exists(paste0("data/gsea_hallmark_", genotype, ".rds")))) {
DGEA_all_clusters_ranked <- readRDS(paste0("data/DGEA_geneLists_", genotype, ".rds"))
gsea_all_clusters_H <- vector("list", length = length(DGEA_all_clusters_ranked))
names(gsea_all_clusters_H) <- names(DGEA_all_clusters_ranked)
for (cluster in names(DGEA_all_clusters_ranked)) {
print(cluster)
gsea_cluster_H <- clusterProfiler::GSEA(DGEA_all_clusters_ranked[[cluster]],
TERM2GENE = H_pathways_mm,
nPerm = 10000,
pvalueCutoff = 0.1,
pAdjustMethod = "BH")
gsea_cluster_H <- DOSE::setReadable(gsea_cluster_H, org.Mm.eg.db, keyType = "ENTREZID")
res_gsea_cluster_H <- as.data.frame(gsea_cluster_H)
gsea_all_clusters_H[[cluster]] <- res_gsea_cluster_H
}
saveRDS(gsea_all_clusters_H, paste0("data/gsea_hallmark_", genotype, ".rds"))
}
gsea_all_clusters_H <- readRDS(paste0("data/gsea_hallmark_", genotype, ".rds"))
length(gsea_all_clusters_H)
gsea_all_clusters_H_red <- gsea_all_clusters_H[sapply(gsea_all_clusters_H, nrow) > 0]
# Wrangle for plotting
gsea_all_clusters_H_mut <- vector("list", length(gsea_all_clusters_H_red))
names(gsea_all_clusters_H_mut) <- names(gsea_all_clusters_H_red)
for (cluster in names(gsea_all_clusters_H_red)) {
# Wrangle output of GSEA for nicer plotting
gsea_cluster_count_H <-
gsea_all_clusters_H_red[[cluster]] %>%
group_by(ID) %>%
summarise(count = sum(str_count(core_enrichment, "/")) + 1)
gsea_cluster_H_df<-
left_join(gsea_all_clusters_H_red[[cluster]], gsea_cluster_count_H, by = "ID") %>%
mutate(GeneRatio = count/setSize ,
type = case_when(NES < 0 ~ "Suppressed",
NES > 0 ~ "Activated")) %>%
mutate(type = ordered(type))
gsea_all_clusters_H_mut[[cluster]] <- gsea_cluster_H_df
}
saveRDS(gsea_all_clusters_H_mut, paste0("data/GSEA_H_mut_", genotype, ".RDS"))
}
clusters <- levels(df_inf$celltype_grouped)
for (genotype in c("E2", "E4")) {
ls_gsea_H <- vector("list")
gsea_all_clusters_H_mut <- readRDS(paste0("data/GSEA_H_mut_", genotype, ".RDS"))
for (celltype in names(gsea_all_clusters_H_mut)){
df_for_each_cluster <- dplyr::select(gsea_all_clusters_H_mut[[celltype]],
Description, enrichmentScore, p.adjust)
ls_gsea_H[[celltype]] <- df_for_each_cluster
}
df_gsea_H <-
bind_rows(ls_gsea_H, .id = "cluster") %>%
pivot_wider(names_from = cluster, values_from = enrichmentScore, id_cols = Description)
## Add no hits to clusters with no significant pathway enrichment
missing_clusters <- clusters[!clusters %in% names(ls_gsea_H)]
missing_clusters_df <- data.frame(matrix(ncol = length(missing_clusters),
nrow = nrow(df_gsea_H)))
rownames(missing_clusters_df) <- df_gsea_H$Description
colnames(missing_clusters_df) <- missing_clusters
df_gsea_h_combined <-
as_tibble(missing_clusters_df, rownames = "Description") |>
full_join(df_gsea_H)
df_gsea_h_combined <- as.data.frame(df_gsea_h_combined)
rownames(df_gsea_h_combined) <- df_gsea_h_combined$Description
mtx_gsea_H <- as.matrix(df_gsea_h_combined[, clusters])
rownames(mtx_gsea_H) <- df_gsea_h_combined$Description
mtx_gsea_H[is.na(mtx_gsea_H)] <- 0
#pdf(file = paste0("GSEA_H_mut_", genotype, ".pdf"))
print(
Heatmap(mtx_gsea_H, border = TRUE,
show_row_dend = FALSE, show_column_dend = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5),
column_names_gp = gpar(fontsize = 5),
col = circlize::colorRamp2(c(-1, 0, 1),
c("blue", "grey 95", "red")),
rect_gp = gpar(col = "white", lwd = 1), row_order = immune_hallmark,
heatmap_legend_param = list(grid_width = unit(0.2, "cm"),
legend_height = unit(0.4, "cm"))))
#dev.off()
}
## Joining, by = "Description"
## Warning: `legend_height` you specified is too small, use the default minimal
## height.
## Warning: `legend_height` you specified is too small, use the default minimal
## height.
## Warning: `legend_height` you specified is too small, use the default minimal
## height.
## Joining, by = "Description"
## Warning: `legend_height` you specified is too small, use the default minimal
## height.
## Warning: `legend_height` you specified is too small, use the default minimal
## height.
## Warning: `legend_height` you specified is too small, use the default minimal
## height.